# coding:utf-8
from pyspark import SparkConf, SparkContext
from operator import add

if __name__ == '__main__':
    # 初始化执行环境，构建SparkContext对象
    conf = SparkConf().setMaster("local[*]").setAppName("BrowserCount")
    sc = SparkContext(conf=conf)
    # 读取数据
    file_rdd = sc.textFile("../csv文件/各城市游记浏览量/汇总.csv")
    # 切分并提取浏览量数据
    split_rdd = file_rdd.map(lambda x: x.split(","))
    browser_rdd = split_rdd.map(lambda x: (x[0], x[2]))  # (key,value)=(城市，浏览量)

    # reduce 数据加和以及排序
    result = browser_rdd.reduceByKey(lambda a, b: (a + b)).sortBy(lambda x: x[1]).take(30)

    browser_rdd.reduceByKey(add).sortBy(lambda x: x[1]).saveAsTextFile(
        "hdfs://master:9000/user/travel_analyse/result")
    print(result)
